package com.lb.bigdata.spark.core.p1

import org.apache.spark.{SparkConf, SparkContext}

object ScalaRemoteWordCountOps {
    def main(args: Array[String]): Unit = {
        if(args == null || args.length < 1) {
            println("Usage: <inputpath>")
            System.exit(-1)
        }
        //接受外部参数
        val Array(inputpath) = args

        val conf = new SparkConf()
                .setAppName("ScalaRemoteWordCount")
        val sc = new SparkContext(conf)

        val lines = sc.textFile(inputpath)//读取hdfs
        println("分区个数为: " + lines.getNumPartitions)
        val words = lines.flatMap(_.split("\\s+"))
        val ret = words.map((_, 1)).reduceByKey(_+_)
        ret.foreach(println)

        sc.stop()
    }
}
